#define main jpegMain

#define JPEG_INTERNALS
#include "jinclude.h"
#include "jpeglib.h"

/*
   In the current system design, the main buffer need never be a full-image
   buffer; any full-height buffers will be found inside the coefficient or
   postprocessing controllers.  Nonetheless, the main controller is not
   trivial.  Its responsibility is to provide context rows for upsampling/
   rescaling, and doing this in an efficient fashion is a bit tricky.

   Postprocessor input data is counted in "row groups".  A row group
   is defined to be (v_samp_factor * DCT_scaled_size / min_DCT_scaled_size)
   sample rows of each component.  (We require DCT_scaled_size values to be
   chosen such that these numbers are integers.  In practice DCT_scaled_size
   values will likely be powers of two, so we actually have the stronger
   condition that DCT_scaled_size / min_DCT_scaled_size is an integer.)
   Upsampling will typically produce max_v_samp_factor pixel rows from each
   row group (times any additional scale factor that the upsampler is
   applying).

   The coefficient controller will deliver data to us one iMCU row at a time;
   each iMCU row contains v_samp_factor * DCT_scaled_size sample rows, or
   exactly min_DCT_scaled_size row groups.  (This amount of data corresponds
   to one row of MCUs when the image is fully interleaved.)  Note that the
   number of sample rows varies across components, but the number of row
   groups does not.  Some garbage sample rows may be included in the last iMCU
   row at the bottom of the image.

   Depending on the vertical scaling algorithm used, the upsampler may need
   access to the sample row(s) above and below its current input row group.
   The upsampler is required to set need_context_rows TRUE at global selection
   time if so.  When need_context_rows is FALSE, this controller can simply
   obtain one iMCU row at a time from the coefficient controller and dole it
   out as row groups to the postprocessor.

   When need_context_rows is TRUE, this controller guarantees that the buffer
   passed to postprocessing contains at least one row group's worth of samples
   above and below the row group(s) being processed.  Note that the context
   rows "above" the first passed row group appear at negative row offsets in
   the passed buffer.  At the top and bottom of the image, the required
   context rows are manufactured by duplicating the first or last real sample
   row; this avoids having special cases in the upsampling inner loops.

   The amount of context is fixed at one row group just because that's a
   convenient number for this controller to work with.  The existing
   upsamplers really only need one sample row of context.  An upsampler
   supporting arbitrary output rescaling might wish for more than one row
   group of context when shrinking the image; tough, we don't handle that.
   (This is justified by the assumption that downsizing will be handled mostly
   by adjusting the DCT_scaled_size values, so that the actual scale factor at
   the upsample step needn't be much less than one.)

   To provide the desired context, we have to retain the last two row groups
   of one iMCU row while reading in the next iMCU row.  (The last row group
   can't be processed until we have another row group for its below-context,
   and so we have to save the next-to-last group too for its above-context.)
   We could do this most simply by copying data around in our buffer, but
   that'd be very slow.  We can avoid copying any data by creating a rather
   strange pointer structure.  Here's how it works.  We allocate a workspace
   consisting of M+2 row groups (where M = min_DCT_scaled_size is the number
   of row groups per iMCU row).  We create two sets of redundant pointers to
   the workspace.  Labeling the physical row groups 0 to M+1, the synthesized
   pointer lists look like this:
                     M+1                          M-1
   master pointer --> 0         master pointer --> 0
                      1                            1
                     ...                          ...
                     M-3                          M-3
                     M-2                           M
                     M-1                          M+1
                      M                           M-2
                     M+1                          M-1
                      0                            0
   We read alternate iMCU rows using each master pointer; thus the last two
   row groups of the previous iMCU row remain un-overwritten in the workspace.
   The pointer lists are set up so that the required context rows appear to
   be adjacent to the proper places when we pass the pointer lists to the
   upsampler.

   The above pictures describe the normal state of the pointer lists.
   At top and bottom of the image, we diddle the pointer lists to duplicate
   the first or last sample row as necessary (this is cheaper than copying
   sample rows around).

   This scheme breaks down if M < 2, ie, min_DCT_scaled_size is 1.  In that
   situation each iMCU row provides only one row group so the buffering logic
   must be different (eg, we must read two iMCU rows before we can emit the
   first row group).  For now, we simply do not support providing context
   rows when min_DCT_scaled_size is 1.  That combination seems unlikely to
   be worth providing --- if someone wants a 1/8th-size preview, they probably
   want it quick and dirty, so a context-free upsampler is sufficient.
*/


/* Private buffer controller object */

typedef struct {
  struct jpeg_d_main_controller pub; /* public fields */

  /* Pointer to allocated workspace (M or M+2 row groups). */
  JSAMPARRAY buffer[MAX_COMPONENTS];

  wxjpeg_boolean buffer_full;		/* Have we gotten an iMCU row from decoder? */
  JDIMENSION rowgroup_ctr;	/* counts row groups output to postprocessor */

  /* Remaining fields are only used in the context case. */

  /* These are the master pointers to the funny-order pointer lists. */
  JSAMPIMAGE xbuffer[2];	/* pointers to weird pointer lists */

  int whichptr;			/* indicates which pointer set is now in use */
  int context_state;		/* process_data state machine status */
  JDIMENSION rowgroups_avail;	/* row groups available to postprocessor */
  JDIMENSION iMCU_row_ctr;	/* counts iMCU rows to detect image top/bot */
} my_main_controller;

typedef my_main_controller * my_main_ptr;

/* context_state values: */
#define CTX_PREPARE_FOR_IMCU	0	/* need to prepare for MCU row */
#define CTX_PROCESS_IMCU	1	/* feeding iMCU to postprocessor */
#define CTX_POSTPONED_ROW	2	/* feeding postponed row group */


#if defined(__VISAGECPP__)
/* Visual Age fixups for multiple declarations */
#  define start_pass_main   start_pass_main2 /* already in jcmaint.c */
#  define process_data_simple_main process_data_simple_main2 /* already in jcmaint.c */
#endif

/* Forward declarations */
METHODDEF( void ) process_data_simple_main
JPP( ( j_decompress_ptr cinfo, JSAMPARRAY output_buf,
       JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail ) );
METHODDEF( void ) process_data_context_main
JPP( ( j_decompress_ptr cinfo, JSAMPARRAY output_buf,
       JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail ) );
#ifdef QUANT_2PASS_SUPPORTED
METHODDEF( void ) process_data_crank_post
JPP( ( j_decompress_ptr cinfo, JSAMPARRAY output_buf,
       JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail ) );
#endif


static void alloc_funny_pointers( j_decompress_ptr cinfo ) {
  my_main_ptr main = ( my_main_ptr ) cinfo->main;
  int ci, rgroup;
  int M = cinfo->min_DCT_scaled_size;
  jpeg_component_info *compptr;
  JSAMPARRAY xbuf;
  /* Get top-level space for component array pointers.
     We alloc both arrays with one call to save a few cycles.
  */
  main->xbuffer[0] = ( JSAMPIMAGE )
                     ( *cinfo->mem->alloc_small )( ( j_common_ptr ) cinfo, JPOOL_IMAGE,
                         cinfo->num_components * 2 * SIZEOF( JSAMPARRAY ) );
  main->xbuffer[1] = main->xbuffer[0] + cinfo->num_components;
  for( ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
       ci++, compptr++ ) {
    rgroup = ( compptr->v_samp_factor * compptr->DCT_scaled_size ) /
             cinfo->min_DCT_scaled_size; /* height of a row group of component */
    xbuf = ( JSAMPARRAY )
           ( *cinfo->mem->alloc_small )( ( j_common_ptr ) cinfo, JPOOL_IMAGE,
                                         2 * ( rgroup * ( M + 4 ) ) * SIZEOF( JSAMPROW ) );
    xbuf += rgroup;		/* want one row group at negative offsets */
    main->xbuffer[0][ci] = xbuf;
    xbuf += rgroup * ( M + 4 );
    main->xbuffer[1][ci] = xbuf;
  }
}


static void make_funny_pointers( j_decompress_ptr cinfo ) {
  my_main_ptr main = ( my_main_ptr ) cinfo->main;
  int ci, i, rgroup;
  int M = cinfo->min_DCT_scaled_size;
  jpeg_component_info *compptr;
  JSAMPARRAY buf, xbuf0, xbuf1;
  for( ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
       ci++, compptr++ ) {
    rgroup = ( compptr->v_samp_factor * compptr->DCT_scaled_size ) /
             cinfo->min_DCT_scaled_size; /* height of a row group of component */
    xbuf0 = main->xbuffer[0][ci];
    xbuf1 = main->xbuffer[1][ci];
    /* First copy the workspace pointers as-is */
    buf = main->buffer[ci];
    for( i = 0; i < rgroup * ( M + 2 ); i++ ) {
      xbuf0[i] = xbuf1[i] = buf[i];
    }
    /* In the second list, put the last four row groups in swapped order */
    for( i = 0; i < rgroup * 2; i++ ) {
      xbuf1[rgroup * ( M - 2 ) + i] = buf[rgroup * M + i];
      xbuf1[rgroup * M + i] = buf[rgroup * ( M - 2 ) + i];
    }
    for( i = 0; i < rgroup; i++ ) {
      xbuf0[i - rgroup] = xbuf0[0];
    }
  }
}


static void set_wraparound_pointers( j_decompress_ptr cinfo ) {
  my_main_ptr main = ( my_main_ptr ) cinfo->main;
  int ci, i, rgroup;
  int M = cinfo->min_DCT_scaled_size;
  jpeg_component_info *compptr;
  JSAMPARRAY xbuf0, xbuf1;
  for( ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
       ci++, compptr++ ) {
    rgroup = ( compptr->v_samp_factor * compptr->DCT_scaled_size ) /
             cinfo->min_DCT_scaled_size; /* height of a row group of component */
    xbuf0 = main->xbuffer[0][ci];
    xbuf1 = main->xbuffer[1][ci];
    for( i = 0; i < rgroup; i++ ) {
      xbuf0[i - rgroup] = xbuf0[rgroup * ( M + 1 ) + i];
      xbuf1[i - rgroup] = xbuf1[rgroup * ( M + 1 ) + i];
      xbuf0[rgroup * ( M + 2 ) + i] = xbuf0[i];
      xbuf1[rgroup * ( M + 2 ) + i] = xbuf1[i];
    }
  }
}

static void set_bottom_pointers( j_decompress_ptr cinfo ) {
  my_main_ptr main = ( my_main_ptr ) cinfo->main;
  int ci, i, rgroup, iMCUheight, rows_left;
  jpeg_component_info *compptr;
  JSAMPARRAY xbuf;
  for( ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
       ci++, compptr++ ) {
    /* Count sample rows in one iMCU row and in one row group */
    iMCUheight = compptr->v_samp_factor * compptr->DCT_scaled_size;
    rgroup = iMCUheight / cinfo->min_DCT_scaled_size;
    /* Count nondummy sample rows remaining for this component */
    rows_left = ( int )( compptr->downsampled_height % ( JDIMENSION ) iMCUheight );
    if( rows_left == 0 ) {
      rows_left = iMCUheight;
    }
    /* Count nondummy row groups.  Should get same answer for each component,
       so we need only do it once.
    */
    if( ci == 0 ) {
      main->rowgroups_avail = ( JDIMENSION )( ( rows_left - 1 ) / rgroup + 1 );
    }
    /* Duplicate the last real sample row rgroup*2 times; this pads out the
       last partial rowgroup and ensures at least one full rowgroup of context.
    */
    xbuf = main->xbuffer[main->whichptr][ci];
    for( i = 0; i < rgroup * 2; i++ ) {
      xbuf[rows_left + i] = xbuf[rows_left - 1];
    }
  }
}


/*
   Initialize for a processing pass.
*/

METHODDEF( void )
start_pass_main( j_decompress_ptr cinfo, J_BUF_MODE pass_mode ) {
  my_main_ptr main = ( my_main_ptr ) cinfo->main;
  switch( pass_mode ) {
    case JBUF_PASS_THRU:
      if( cinfo->upsample->need_context_rows ) {
        main->pub.process_data = process_data_context_main;
        make_funny_pointers( cinfo ); /* Create the xbuffer[] lists */
        main->whichptr = 0;	/* Read first iMCU row into xbuffer[0] */
        main->context_state = CTX_PREPARE_FOR_IMCU;
        main->iMCU_row_ctr = 0;
      } else {
        /* Simple case with no context needed */
        main->pub.process_data = process_data_simple_main;
      }
      main->buffer_full = FALSE;	/* Mark buffer empty */
      main->rowgroup_ctr = 0;
      break;
      #ifdef QUANT_2PASS_SUPPORTED
    case JBUF_CRANK_DEST:
      /* For last pass of 2-pass quantization, just crank the postprocessor */
      main->pub.process_data = process_data_crank_post;
      break;
      #endif
    default:
      ERREXIT( cinfo, JERR_BAD_BUFFER_MODE );
      break;
  }
}


/*
   Process some data.
   This handles the simple case where no context is required.
*/

METHODDEF( void )
process_data_simple_main( j_decompress_ptr cinfo,
                          JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
                          JDIMENSION out_rows_avail ) {
  my_main_ptr main = ( my_main_ptr ) cinfo->main;
  JDIMENSION rowgroups_avail;
  /* Read input data if we haven't filled the main buffer yet */
  if( ! main->buffer_full ) {
    if( !( *cinfo->coef->decompress_data )( cinfo, main->buffer ) )
    { return; }			/* suspension forced, can do nothing more */
    main->buffer_full = TRUE;	/* OK, we have an iMCU row to work with */
  }
  /* There are always min_DCT_scaled_size row groups in an iMCU row. */
  rowgroups_avail = ( JDIMENSION ) cinfo->min_DCT_scaled_size;
  /* Note: at the bottom of the image, we may pass extra garbage row groups
     to the postprocessor.  The postprocessor has to check for bottom
     of image anyway (at row resolution), so no point in us doing it too.
  */
  /* Feed the postprocessor */
  ( *cinfo->post->post_process_data )( cinfo, main->buffer,
                                       &main->rowgroup_ctr, rowgroups_avail,
                                       output_buf, out_row_ctr, out_rows_avail );
  /* Has postprocessor consumed all the data yet? If so, mark buffer empty */
  if( main->rowgroup_ctr >= rowgroups_avail ) {
    main->buffer_full = FALSE;
    main->rowgroup_ctr = 0;
  }
}


/*
   Process some data.
   This handles the case where context rows must be provided.
*/

METHODDEF( void )
process_data_context_main( j_decompress_ptr cinfo,
                           JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
                           JDIMENSION out_rows_avail ) {
  my_main_ptr main = ( my_main_ptr ) cinfo->main;
  /* Read input data if we haven't filled the main buffer yet */
  if( ! main->buffer_full ) {
    if( !( *cinfo->coef->decompress_data )( cinfo,
                                            main->xbuffer[main->whichptr] ) )
    { return; }			/* suspension forced, can do nothing more */
    main->buffer_full = TRUE;	/* OK, we have an iMCU row to work with */
    main->iMCU_row_ctr++;	/* count rows received */
  }
  /* Postprocessor typically will not swallow all the input data it is handed
     in one call (due to filling the output buffer first).  Must be prepared
     to exit and restart.  This switch lets us keep track of how far we got.
     Note that each case falls through to the next on successful completion.
  */
  switch( main->context_state ) {
    case CTX_POSTPONED_ROW:
      /* Call postprocessor using previously set pointers for postponed row */
      ( *cinfo->post->post_process_data )( cinfo, main->xbuffer[main->whichptr],
                                           &main->rowgroup_ctr, main->rowgroups_avail,
                                           output_buf, out_row_ctr, out_rows_avail );
      if( main->rowgroup_ctr < main->rowgroups_avail )
      { return; }			/* Need to suspend */
      main->context_state = CTX_PREPARE_FOR_IMCU;
      if( *out_row_ctr >= out_rows_avail )
      { return; }			/* Postprocessor exactly filled output buf */
    /*FALLTHROUGH*/
    case CTX_PREPARE_FOR_IMCU:
      /* Prepare to process first M-1 row groups of this iMCU row */
      main->rowgroup_ctr = 0;
      main->rowgroups_avail = ( JDIMENSION )( cinfo->min_DCT_scaled_size - 1 );
      /* Check for bottom of image: if so, tweak pointers to "duplicate"
         the last sample row, and adjust rowgroups_avail to ignore padding rows.
      */
      if( main->iMCU_row_ctr == cinfo->total_iMCU_rows )
      { set_bottom_pointers( cinfo ); }
      main->context_state = CTX_PROCESS_IMCU;
    /*FALLTHROUGH*/
    case CTX_PROCESS_IMCU:
      /* Call postprocessor using previously set pointers */
      ( *cinfo->post->post_process_data )( cinfo, main->xbuffer[main->whichptr],
                                           &main->rowgroup_ctr, main->rowgroups_avail,
                                           output_buf, out_row_ctr, out_rows_avail );
      if( main->rowgroup_ctr < main->rowgroups_avail )
      { return; }			/* Need to suspend */
      /* After the first iMCU, change wraparound pointers to normal state */
      if( main->iMCU_row_ctr == 1 )
      { set_wraparound_pointers( cinfo ); }
      /* Prepare to load new iMCU row using other xbuffer list */
      main->whichptr ^= 1;	/* 0=>1 or 1=>0 */
      main->buffer_full = FALSE;
      /* Still need to process last row group of this iMCU row, */
      /* which is saved at index M+1 of the other xbuffer */
      main->rowgroup_ctr = ( JDIMENSION )( cinfo->min_DCT_scaled_size + 1 );
      main->rowgroups_avail = ( JDIMENSION )( cinfo->min_DCT_scaled_size + 2 );
      main->context_state = CTX_POSTPONED_ROW;
  }
}


/*
   Process some data.
   Final pass of two-pass quantization: just call the postprocessor.
   Source data will be the postprocessor controller's internal buffer.
*/

#ifdef QUANT_2PASS_SUPPORTED

METHODDEF( void )
process_data_crank_post( j_decompress_ptr cinfo,
                         JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
                         JDIMENSION out_rows_avail ) {
  ( *cinfo->post->post_process_data )( cinfo, ( JSAMPIMAGE ) NULL,
                                       ( JDIMENSION * ) NULL, ( JDIMENSION ) 0,
                                       output_buf, out_row_ctr, out_rows_avail );
}

#endif /* QUANT_2PASS_SUPPORTED */

void jinit_d_main_controller( j_decompress_ptr cinfo, wxjpeg_boolean need_full_buffer ) {
  my_main_ptr main;
  int ci, rgroup, ngroups;
  jpeg_component_info *compptr;
  main = ( my_main_ptr )
         ( *cinfo->mem->alloc_small )( ( j_common_ptr ) cinfo, JPOOL_IMAGE,
                                       SIZEOF( my_main_controller ) );
  cinfo->main = ( struct jpeg_d_main_controller * ) main;
  main->pub.start_pass = start_pass_main;
  if( need_full_buffer )		/* shouldn't happen */
  { ERREXIT( cinfo, JERR_BAD_BUFFER_MODE ); }
  /* Allocate the workspace.
     ngroups is the number of row groups we need.
  */
  if( cinfo->upsample->need_context_rows ) {
    if( cinfo->min_DCT_scaled_size < 2 ) /* unsupported, see comments above */
    { ERREXIT( cinfo, JERR_NOTIMPL ); }
    alloc_funny_pointers( cinfo ); /* Alloc space for xbuffer[] lists */
    ngroups = cinfo->min_DCT_scaled_size + 2;
  } else {
    ngroups = cinfo->min_DCT_scaled_size;
  }
  for( ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
       ci++, compptr++ ) {
    rgroup = ( compptr->v_samp_factor * compptr->DCT_scaled_size ) /
             cinfo->min_DCT_scaled_size; /* height of a row group of component */
    main->buffer[ci] = ( *cinfo->mem->alloc_sarray )
                       ( ( j_common_ptr ) cinfo, JPOOL_IMAGE,
                         compptr->width_in_blocks * compptr->DCT_scaled_size,
                         ( JDIMENSION )( rgroup * ngroups ) );
  }
}

#if defined(__VISAGECPP__)
#  ifdef start_pass_main2
#   undef start_pass_main2
#  endif
#  ifdef process_data_simple_main2
#   undef process_data_simple_main2
#  endif
#endif
